###################################################################################################
## Payson (2018) "Cities in the Statehouse" Replication Code
## California Analysis (Table 2 and Figures 2 - 7)
## Created with R version 3.4.1 (2017-06-30) -- "Single Candle"
###################################################################################################

## Set working directory
setwd()

## Load packages 
library(ggplot2); library(plyr); library(plm); library(lmtest); library(effects); 
library(stargazer); library(sandwich); library(RColorBrewer); library(reshape);
library(statar); library(xtable)


## Functions for tables (Note: tables will not compile without these functions)
pretty <- function(x) {
  paste(prettyNum(x, big.mark = ","))
}

clusters <- function(x) {
  length(fixef(x))
}

N <- function(x) {
  dim(x$model)[1]
}


###################################################################################################
## Load California city lobbying panel data

load("ca_lobby2002-2015.RData")


###################################################################################################
## Figure 2: Parallel Trends: Time Until Lobbying and City Revenue from State
###################################################################################################

## Create control group of non-lobbying cities 
never.lobby <- subset(dta, lobby.category == "Never")

## Get median transfers for non-lobbying cities in each year
never.lobby.transfers <- ddply(never.lobby, .(year), summarize,
                               control.state.pp = median(state.pp, na.rm = T))

## Merge non-lobbying median transfers values to original data by year (e.g. each observation in
## 2002 gets a control value of 162.95, the median transfers to non-lobbying cities in that year)
test <- merge(dta, never.lobby.transfers, by = "year", all.x = T)
dim(test)[1] == dim(dta)[1]
dta1 <- test

## Set scaled year levels
dta1$scaled.year <- factor(dta1$scaled.year, 
                           levels = c("year.lag3", "year.lag2", "year.lag1", "year0", "year1"))

## Subset to sample of cities that follow pattern of not lobbying for 3 years followed by at least
## 1 year of lobbying (E.g. any observation with a scaled year value)

sample <- subset(dta1, !is.na(scaled.year))


## For each scaled year value, calculate the average transfer for cities that start lobbying in
## year 0 and average transfers for the synthetic control set of non-lobbying cities
average.transfers <- ddply(sample, .(scaled.year), summarize,
                           treat.state.pp = mean(state.pp, na.rm = T),
                           control.state.pp = mean(control.state.pp, na.rm = T))


## Transform for plotting
average.transfers1 <- melt(average.transfers, id = "scaled.year")


## Add numeric values for scaled year variable for plotting
average.transfers1$year <- rep(c(-3, -2, -1, 0, 1), 2)
levels(average.transfers1[,2]) <- c("Lobbying Cities", "Non-Lobbying Cities")
average.transfers1


## Plot average transfers for lobbying and non-lobbying ciites by scaled year
## Figure 2: Parallel Trends: Time Until Lobbying and City Revenue from State
ggplot(average.transfers1, aes(year, value, linetype = variable)) +
  geom_point() + geom_line() +
  scale_linetype_discrete(name = "") +
  geom_vline(xintercept = -.5, linetype = 2) +
  theme_bw(base_family = "serif", base_size = 14) +
  theme(plot.background = element_blank(),
        panel.grid.major = element_blank(),
        panel.grid.minor = element_blank(),
        strip.background = element_rect(fill = "white"),
        legend.position = c(.18, .2)) +
  labs(x = "Years Until Treatment", y = "State Transfers Per Capita ($)") +
  annotate("text", x = .1, y = 100, label = "First Year Lobbying", family = "serif", size = 4)



###################################################################################################
## Table 2: City Lobbying and State Revenue Transfers in California
###################################################################################################

load("ca_lobby2002-2015.RData")

## Load plm package. Format panel data for analysis.
pdta <- pdata.frame(dta, index=c("city", "year"))

## Column 1
m1 <- plm(state.pp1 ~ lobby + year, data = pdta, model = "within")
coef1 <- coeftest(m1, vcov = vcovHC(m1, method = "arellano", type = "HC1", cluster = "group"))

## Column 2
m2 <- plm(state.pp1 ~ lobby + log.pop + log.taxes + year, data = pdta, model = "within")
coef2 <- coeftest(m2, vcov = vcovHC(m2, method = "arellano", type = "HC1", cluster = "group"))

## Column 3
m3 <- plm(state.pp1 ~ lobby + log.pop + log.taxes + log.liabilities + year, 
          data = pdta, model = "within")
coef3 <- coeftest(m3, vcov = vcovHC(m3, method = "arellano", type = "HC1", cluster = "group"))

## Column 4 (model takes a while to run)
m4 <- plm(state.pp1 ~ lobby + log.pop + log.taxes + log.liabilities + year + city*trend,
          data = pdta, model = "within")
coef4 <- coeftest(m4, vcov = vcovHC(m4, method = "arellano", type = "HC1", cluster = "group"))

## Column 5 (model takes a while to run)
m5 <- plm(state.pp1 ~ lobby + log.pop + log.taxes + log.liabilities + log.trans + log.utilities + 
            log.pubsafety + log.health + log.commdev + log.parks + log.mean.inc + white.pct + 
            poverty + republican + year + city*trend, data = pdta, model = "within")
coef5 <- coeftest(m5, vcov = vcovHC(m5, method = "arellano", type = "HC1", cluster = "group"))


## Table 2: City Lobbying and State Revenue Transfers in California
## Note: be sure to load functions at start of replication code
stargazer(coef1, coef2, coef3, coef4, coef5, type = "text", star.cutoffs = c(0.05),
          title = c("City Lobbying and State Revenue Transfers in California."),
          dep.var.caption = "State Transfers Per Capita, t + 1", dep.var.labels.include = FALSE, 
          keep = c("lobby", "log.pop", "log.taxes", "log.liabilities"),
          covariate.labels = c("Lobby, t", "Population (Log), t",
                               "Tax Revenue (Log), t", "Total Liabilities (Log), t"),
          add.lines = list(c("City FE", "Y", "Y", "Y", "Y", "Y"), 
                           c("Year FE", "Y", "Y", "Y", "Y", "Y"),
                           c("City Linear Trend", "", "", "", "Y", "Y"),
                           c("Full Controls", "", "", "", "", "Y"),
                           c("Observations", pretty(N(m1)), pretty(N(m2)),
                             pretty(N(m3)), pretty(N(m4)), pretty(N(m5))),
                           c("# Cities", pretty(clusters(m1)), pretty(clusters(m2)),
                             pretty(clusters(m3)), pretty(clusters(m4)), pretty(clusters(m5))),
                           c("Mean Transfers", round(mean(m1$model$state.pp1)),
                             round(mean(m2$model$state.pp1)),
                             round(mean(m3$model$state.pp1)),
                             round(mean(m4$model$state.pp1)),
                             round(mean(m4$model$state.pp1)))),                        
          notes = paste("Robust standard errors clustered by city. *p<0.05"),
          notes.append = FALSE, notes.label = "")	



###################################################################################################
## Figure 3: City Lobbying and Revenue Transfers from State: Lags and Leads
###################################################################################################

load("ca_lobby2002-2015.RData")

## Load plm package. Format panel data for analysis.
pdta <- pdata.frame(dta, index=c("city", "year"))

## Model for figure
lags.leads <- plm(state.pp1 ~ lobby.lag1 + lobby + lobby.lead1 + lobby.lead2 +
                    lobby.lead3 + log.pop + log.taxes + year, 
                  data = pdta, model = "within")
coef <- coeftest(lags.leads, vcov = vcovHC(lags.leads, method = "arellano", 
                                            type = "HC1", cluster = "group"))


## Format data for figure
beta <- c(coef[1, 1], coef[2, 1], coef[3, 1], coef[4, 1], coef[5, 1])
se <- c(coef[1, 2], coef[2, 2], coef[3, 2], coef[4, 2], coef[5,2])
figdata <- data.frame(beta = c(beta), se = c(se), x = c(1, 0, -1, -2, -3))


## Figure 3: City Lobbying and Revenue Transfers from State: Lags and Leads
ggplot(figdata, aes(x = x, y = beta)) + 
    geom_point(size = 2) +
    geom_errorbar(aes(ymin = beta - (1.9 * se), ymax = beta + (1.9 * se)), 
                  size = .5, width = .1) +
    geom_hline(yintercept = 0, linetype = 2) +
    scale_x_continuous(breaks = c(-3, -2, -1, 0, 1), 
                       labels = c("3 Years Prior", "2 Years Prior", "1 Year Prior", 
                                  "Start Lobbying", "1 Year After")) +
    geom_vline(xintercept = -.5) +
    theme_bw(base_family = "serif", base_size = 14)  +
    labs(y = "Effect on State Transfers Per Capita, t +1", 
         x = "Years Relative to Treatment")



###################################################################################################
## Figure 4: Lobbying and State Transfers by City Revenue Tercile
###################################################################################################

load("ca_lobby2002-2015.RData")

## Create function for clustered standard errors
cl <- function(m){
  M <- length(unique(m$model$city))
  N <- length(m$model$city)
  K <- m$rank
  dfc <- (M/(M-1))*((N-1)/(N-K))
  uj  <- apply(estfun(m),2, function(x) tapply(x, m$model$city, sum))
  vcovCL <- dfc*sandwich(m, meat=crossprod(uj)/N)
  vcovCL}


## Convert lobby to factor for "effects" package
dta$lobby <- factor(dta$lobby)


## Specify model
m <- lm(state.pp1 ~ lobby*rev.tercile + log.pop + year + city, data = dta)


## Use "effects" package to estimate marginal effect of lobbying by revenue tercile
plot.dta <- data.frame(tercile = c(rep("Bottom", 2), rep("Middle", 2), rep("Top", 2)),
                       lower = c(summary(effect("lobby*rev.tercile", m, vcov. = cl))$lower[,1],
                                 summary(effect("lobby*rev.tercile", m, vcov. = cl))$lower[,2],
                                 summary(effect("lobby*rev.tercile", m, vcov. = cl))$lower[,3]),
                       upper = c(summary(effect("lobby*rev.tercile", m, vcov. = cl))$upper[,1],
                                 summary(effect("lobby*rev.tercile", m, vcov. = cl))$upper[,2],
                                 summary(effect("lobby*rev.tercile", m, vcov. = cl))$upper[,3]),
                       effect = c(summary(effect("lobby*rev.tercile", m, vcov. = cl))$effect[,1],
                                  summary(effect("lobby*rev.tercile", m, vcov. = cl))$effect[,2],
                                  summary(effect("lobby*rev.tercile", m, vcov. = cl))$effect[,3]),
                       Lobby = rep(c("No Lobby", "Lobby"), 3))


## Set up dataframe for plotting
plot.dta$Lobby <- factor(plot.dta$Lobby, levels = c("No Lobby", "Lobby"))
plot.dta$none <- ifelse(plot.dta$Lobby == "No Lobby", plot.dta$effect, NA)
plot.dta$none.lower <- ifelse(plot.dta$Lobby == "No Lobby", plot.dta$lower, NA)
plot.dta$none.upper <- ifelse(plot.dta$Lobby == "No Lobby", plot.dta$upper, NA)

dodge <- position_dodge(width = 0.5)

## Figure 4: Lobbying and State Transfers by City Revenue Tercile
ggplot(plot.dta, aes(tercile, effect, fill = Lobby)) +
  geom_bar(position = "dodge", stat = "identity", width = .5) +
  geom_errorbar(aes(ymin = lower, ymax = upper), position = dodge, width = 0.1) +
  ylim(0, 140) +
  scale_fill_grey(name = "Lobby Decision, t", start = .8, end = .2) +
  theme_bw(base_family = "serif", base_size = 14) +
  theme(plot.background = element_blank(),
        strip.background = element_rect(fill = "white")) +
  labs(y = "State Transfers Per Capita, t + 1", x = "City Revenue Tercile")



###################################################################################################
## Figure 5: Effect of City Lobbying Effort on State Transfers
###################################################################################################

load("ca_lobby2002-2015.RData")

## Create function for clustered standard errors
cl <- function(m){
  M <- length(unique(m$model$city))
  N <- length(m$model$city)
  K <- m$rank
  dfc <- (M/(M-1))*((N-1)/(N-K))
  uj  <- apply(estfun(m),2, function(x) tapply(x, m$model$city, sum))
  vcovCL <- dfc*sandwich(m, meat=crossprod(uj)/N)
  vcovCL}


## Specify model
m <- lm(state.pp1 ~ lobby.effort*rev.tercile + log.pop + year + city, data = dta)


## Use "effects" package to estimate marginal effect of lobbying effort by revenue tercile
plot.dta <- data.frame(tercile = c(rep("Bottom", 3), rep("Middle", 3), rep("Top", 3)),
                       lower = c(summary(effect("lobby.effort*rev.tercile", m, vcov. = cl))$lower[,1],
                                 summary(effect("lobby.effort*rev.tercile", m, vcov. = cl))$lower[,2],
                                 summary(effect("lobby.effort*rev.tercile", m, vcov. = cl))$lower[,3]),
                       upper = c(summary(effect("lobby.effort*rev.tercile", m, vcov. = cl))$upper[,1],
                                 summary(effect("lobby.effort*rev.tercile", m, vcov. = cl))$upper[,2],
                                 summary(effect("lobby.effort*rev.tercile", m, vcov. = cl))$upper[,3]),
                       effect = c(summary(effect("lobby.effort*rev.tercile", m, vcov. = cl))$effect[,1],
                                  summary(effect("lobby.effort*rev.tercile", m, vcov. = cl))$effect[,2],
                                  summary(effect("lobby.effort*rev.tercile", m, vcov. = cl))$effect[,3]),
                       Effort = rep(c("No Lobby", "Any Lobby", "100K Lobby"), 3))

plot.dta$Effort <- factor(plot.dta$Effort, levels = c("No Lobby", "Any Lobby", "100K Lobby"))

dodge <- position_dodge(width = 0.5)
limits <- aes(ymin = lower, ymax = upper)

## Figure 5: Effect of City Lobbying Effort on State Transfers
ggplot(plot.dta, aes(tercile, effect, fill = Effort)) +
  geom_bar(position = "dodge", stat = "identity", width = .5) +
  geom_errorbar(limits, position = dodge, width = 0.1) +
  scale_fill_grey(name = "Lobby Effort, t", start = .8, end = .2) +
  theme_bw(base_family = "serif", base_size = 14) +
  theme(plot.background = element_blank(),
        strip.background = element_rect(fill = "white")) +
  labs(y = "State Transfers Per Capita, t + 1", x = "City Revenue Tercile")



###################################################################################################
## Figures 6 and 7: Correlates of City Revenue Per Capita
###################################################################################################

load("ca_lobby2002-2015.RData")

## Get average values of variables by city (requires plyr package)
city <- ddply(dta, .(city), summarize,
              log.own.rev.pp = mean(log.own.rev.pp, na.rm = T),
              log.mean.inc = mean(log.mean.inc, na.rm = T),
              log.med.home = mean(log.med.home, na.rm = T),
              poverty = mean(poverty, na.rm = T),
              white.pct = mean(white.pct, na.rm = T),
              turnout2012 = mean(turnout2012, na.rm = T))


## Figure 6A: Correlates of City Revenue Per Capita (Mean Family Income)
ggplot(city, aes(x = log.own.rev.pp, y = log.mean.inc)) +
  stat_summary_bin(fun.y = mean, bins = 100, geom = "point") +
  stat_smooth(method = "lm", color = "black") +
  theme_bw(base_family = "serif", base_size = 14) +
  theme(plot.background = element_blank(),
        panel.grid.major = element_blank(),
        panel.grid.minor = element_blank(),
        strip.background = element_rect(fill = "white")) +
  annotate("text", label = "N = 467 \n Bins = 100", x = 9, y = 10.7, size = 3.5) +
  labs(y = "Mean Family Income (Log)", x = "City Own Source Revenue \n Per Person (Log)")


## Figure 6B: Correlates of City Revenue Per Capita (Median Home Value)
ggplot(city, aes(log.own.rev.pp, log.med.home)) +
  stat_summary_bin(fun.y = mean, bins = 100, geom = "point") +
  stat_smooth(method = "lm", color = "black") +
  theme_bw(base_family = "serif", base_size = 14) +
  theme(plot.background = element_blank(),
        panel.grid.major = element_blank(),
        panel.grid.minor = element_blank(),
        strip.background = element_rect(fill = "white")) +
  labs(y = "Median Home Value (Log)", x = "City Own Source Revenue \n Per Person (Log)") +
  annotate("text", label = "N = 467 \n Bins = 100", x = 9, y = 11.85, size = 3.5)


## Figure 6C: Correlates of City Revenue Per Capita (Percent Poverty)
ggplot(city, aes(log.own.rev.pp, poverty)) +
  stat_summary_bin(fun.y = mean, bins = 100, geom = "point") +
  stat_smooth(method = "lm", color = "black") +
  theme_bw(base_family = "serif", base_size = 14) +
  theme(plot.background = element_blank(),
        panel.grid.major = element_blank(),
        panel.grid.minor = element_blank(),
        strip.background = element_rect(fill = "white")) +
  labs(y = "Percent Poverty", x = "City Own Source Revenue \n Per Person (Log)") +
  annotate("text", label = "N = 467 \n Bins = 100", x = 9, y = 31, size = 3.5)


## Figure 6D: Correlates of City Revenue Per Capita (Percent White)
ggplot(city, aes(log.own.rev.pp, white.pct)) +
  stat_summary_bin(fun.y = mean, bins = 100, geom = "point") +
  stat_smooth(method = "lm", color = "black") +
  theme_bw(base_family = "serif", base_size = 14) +
  theme(plot.background = element_blank(),
        panel.grid.major = element_blank(),
        panel.grid.minor = element_blank(),
        strip.background = element_rect(fill = "white")) +
  labs(y = "Percent White", x = "City Own Source Revenue \n Per Person (Log)") +
  annotate("text", label = "N = 467 \n Bins = 100", x = 8.8, y = 9, size = 3.5)


## Figure 7: City Revenue Per Capita and Voter Turnout, 2012
ggplot(city, aes(log.own.rev.pp, turnout2012)) +
  stat_summary_bin(fun.y = mean, bins = 100, geom = "point") +
  stat_smooth(method = "lm", color = "black") +
  theme_bw(base_family = "serif", base_size = 14) +
  theme(plot.background = element_blank(),
        panel.grid.major = element_blank(),
        panel.grid.minor = element_blank(),
        strip.background = element_rect(fill = "white")) +
  labs(y = "Turnout (Among Registered Voters)", x = "City Own Source Revenue \n Per Person (Log)") +
  annotate("text", label = "N = 467 \n Bins = 100", x = 9.2, y = .575, size = 3.5)
